---
title: "Missingness Replication"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

This file replicates the missingness section of the paper, ``Mass Digitization of Chinese Court Decisions: How to Use Text as Data in the Field of Chinese Law" by Benjamin Liebman, Margaret Roberts, Rachel Stern and Alice Wang.

```{r cars}

###########################
#Data Loading and Description of Variables
###########################

merged <- read.csv("MissingnessData.csv")
#transparency: 2014 transparency 
#transparency_medadj: 2014 transparency, adjusted for mediation
#court: court name
#GDP.capita: GDP per capita
#pop: population
#transparency16: 2016 transparency
#total16: 2016 total cases online
#officialtotal16: official statistics for total number of cases, from Court Work Reports
#perc.divorce14: proportion of cases that included the word "离婚" in the title of the case, 2014
#perc.divorce16: proportion of cases that included the word "离婚" in the title of the case, 2016
head(merged)

#############################
#Missingness Analysis from 2014
#############################

#Summary statistics on transparency
summary(merged$transparency)

#pdf("CourtTransparencyHistogram_2014.pdf")
tiff("CourtTransparencyHistogram_2014.tiff")
hist(merged$transparency, xlab="Cases Online/Official Total, 2014",
     ylab="Count of Courts", col="darkgray", breaks=20, main="", xlim=c(0,1),
     cex.lab=1.4)
dev.off()

#############################
#Missingness Data from 2016
#############################

#Total courts in 2016
sum(!is.na(merged$total16))
#Overall transparency
sum(merged$total16,na.rm=T)/sum(merged$officialtotal16, na.rm=T)
#Summary of transparency
summary(merged$transparency16)
#Same courts from 2014 for comparison (footnote 21)
summary(merged$transparency[!is.na(merged$total16)])

#pdf("CourtTransparencyHistogram2016.pdf")
tiff("CourtTransparencyHistogram2016.tiff")
hist(merged$transparency16, xlab="Cases Online/Official Total, 2016",
     ylab="Count of Courts", col="darkgray", breaks=20, main="", xlim=c(0,1),
     cex.lab=1.4)
dev.off()

#############################
#Mediation Analysis, 2014
#############################
summary(merged$transparency_medadj)

#############################
#Analysis of Divorce Decisions
#############################

#Summary of proportion divorce in 2014
summary(merged$perc.divorce14)

#Simple regression
lm.14 <- lm(merged$perc.divorce14 ~ merged$transparency)

#png("Divorce2014.png")
tiff("Divorce2014.tiff")
plot(merged$transparency, merged$perc.divorce14, pch=16,xlim=c(0,1),
     xlab="Transparency, 2014", ylab="Proportion Divorce Cases, 2014", ylim=c(0,.25), cex.lab=1.4)
abline(lm.14, lty=2)
dev.off()

#Summary of proportion divorce in 2016
summary(merged$perc.divorce16)

#Simple regression
lm.16 <- lm(merged$perc.divorce16 ~ merged$transparency16)

#png("Divorce2016.png")
tiff("Divorce2016.tiff")
plot(merged$transparency16, merged$perc.divorce16, pch=16,xlim=c(0,1),
     xlab="Transparency, 2016", ylab="Proportion Divorce Cases, 2016", ylim=c(0,.25), cex.lab=1.4)
abline(lm.16, lty=2)
dev.off()

#############################
#Intermediate Versus Basic Courts
#############################
library(stringr)

#Identify intermediate courts
merged$inter <- sapply(merged$court, function (x) str_detect(x, "中级"))
merged$basic <- !merged$inter & merged$court!="河南省高级人民法院"

#Summary statistics on intermediate court transparency, 2014
summary(merged$transparency[merged$inter])
summary(merged$transparency_medadj[merged$inter])
#Summary statistics on basic court transparency, 2014
summary(merged$transparency[merged$basic])
summary(merged$transparency_medadj[merged$basic])

#Intermediate courts have higher transparency rates, 2014
t.test(merged$transparency[merged$inter], merged$transparency[merged$basic])
t.test(merged$transparency_medadj[merged$inter], merged$transparency_medadj[merged$basic])

#Summary statistics on intermediate court transparency, 2016
summary(merged$transparency16[merged$inter])

#Summary statistics on basic court transparency, 2016
summary(merged$transparency16[merged$basic])

#Intermediate courts have higher transparency rates, 2016
t.test(merged$transparency16[merged$inter], merged$transparency16[merged$basic])

##########################################################
#Relationship between GDP and population, 2014, Appendix B
##########################################################

#Transform GDP per capita to thousands and log population
merged$GDP.capita1000 <- merged$GDP.capita/1000
intermediate <- merged[merged$inter,]
lm.1 <- lm(transparency ~ pop, data=intermediate)
lm.2 <- lm(transparency ~ GDP.capita1000, data=intermediate)
lm.3 <- lm(transparency ~ GDP.capita1000 + pop, data=intermediate)

library(stargazer)
stargazer(lm.1, lm.2, lm.3, type="text", star.cutoffs=c(.05,.01))

notintermediate <- merged[merged$basic,]
lm.1 <- lm(transparency ~ pop, data=notintermediate)
lm.2 <- lm(transparency ~ GDP.capita1000, data=notintermediate)
lm.3 <- lm(transparency ~ GDP.capita1000 + pop, data=notintermediate)
library(stargazer)
stargazer(lm.1, lm.2, lm.3, type="text", star.cutoffs=c(.05,.01))
```

